function MDoutput = fct_MD_estimation(MDinput)

% This script estimates the distribution of (mu,sigma) based on the
% observed distribution of spells.
%
% INPUTS:
% data: 
% % TL, TU : minimum and maximum length of spells to be used.
% % muL, muU        : minimum and maximum values of mu in grid
% % Nmu             : number of elements in mu grid
% % sigmaL, sigmaU  : minimum and maximum values of sigma in grid
% % Nsigma          : number of elements in sigma grid
% % 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% retrieve inputs
    mm_mat = fieldnames( MDinput );
    for i = 1 : length(mm_mat)
        eval([cell2mat(mm_mat(i)) '= MDinput.(cell2mat(mm_mat(i)));']);
    end

%
%
%disp('')
%disp('This code estimate mu and sigma using data on pair of spells')
%disp('')

% define some useful parameters
N = Nmu*Nsigma;
Big_T = TU - TL + 1; 
cond_num_reg = zeros(1,length(lambda_Tykhonov_vec));

%%%%% check that choices are consistent
if print_statistics == 1
    if sum(use_CDF_only+use_pdf_only+use_CDF_when_possible) ~= 1
        disp('choose only one option to use')
        return
    end
    if sum(use_CDF_only+use_pdf_only+use_CDF_when_possible) ==0
        disp('need to choose one option')
        return
    end
    if norm(lambda_Tykhonov_vec) > 0 & use_log_scale == 1
       disp('To use Tykhonov regularization the grid for the parameters must be in natural units')
       disp('set use_log_scale to one')
       return
    end
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% READ AND PROCESS DATA 
% create vector phi(t1,t2) for estimation
% it contains durations between TL and TU, and t1<=t2
select = (data_smooth(:,1)>= TL ) & (data_smooth(:,1) <= TU ) & ...
         (data_smooth(:,2)>= TL ) & (data_smooth(:,2) <= TU ) & ...
         (data_smooth(:,1)<= data_smooth(:,2) );
t1_data(:,1)  = data_smooth(select,1);
t2_data(:,1)  = data_smooth(select,2);
phi_data_raw(:,1) = data_smooth(select,3);

%
% ensures that data is non-negative:
if min(phi_data_raw) < 0 
    disp(' ');
    disp('Some entries of data density are negative, we have replaced them by zeros')
    disp(' ');
    phi_data_raw = max(phi_data_raw,0);
end
%

% Compute marginal distribution, survivor and hazard in the data
marg_distr_data  = zeros(Big_T,1);
surv_data        = ones(Big_T,1);
t_hazard = sort(unique(data_smooth(:,1)));
Npeople = sum(data_smooth(:,3));

for i=1:Big_T
    t1 = t_hazard(i); 
    select = data_smooth(:,1)==t1;
    marg_distr_data(i) = sum(data_smooth(select,3))/Npeople;
end

for i=2:Big_T
     surv_data(i) = 1 - sum(marg_distr_data(1:i-1)) ; 
end
hazard_data = marg_distr_data ./ surv_data ; 

% phi_data  is a vector with the data
% phi(t1,t2) = fraction of spells of duration (t1,t2)
%
phi_data = phi_data_raw/sum(phi_data_raw); 
% since it sums to one, the intrepreation is that it is conditional on
% spells being t1< =t2 and spells TL<=t1<=TU
%

%
% create vector of equally spaced spells.
t_values= [TL:1:TU];
%
% create vectors t1_vec and t2_vec with the correponding
% values of t1 and t2 for a given pair of spells
%
t1_vec = t1_data';
t2_vec = t2_data';
%
M = length(t1_vec); % size of the phi, t1 and t2 vector = number of different spells.
%
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% next lines create the values and vectors for mu and sigma
%
%
% grid for mu and grid for sigma:
%
% construct a vectors mu_vec and sigma_vec with the mu and sigma values for
% each combination (mu,sigma)
if use_log_scale == 1 % set grid of mu and sigma in logs
    mu_val_log = [log(muL)    : (log(muU)-log(muL))/(Nmu-1) : log(muU) ];
    mu_values  = exp(mu_val_log);
    sigma_val_log = [log(sigmaL) : (log(sigmaU)-log(sigmaL))/(Nsigma-1) : log(sigmaU)];
    sigma_values = exp(sigma_val_log);
else % set grid of mu and sigma in levels
    mu_values    = [muL    : (muU-muL)/(Nmu-1) : muU ];
    sigma_values = [sigmaL : (sigmaU-sigmaL)/(Nsigma-1) : sigmaU];
end
%
% creates the grid for mu and sigma
%
k=0;
for i=1:Nmu
    for j=1:Nsigma
        k=k+1;
        mu_vec(k)    = mu_values(i)  ;
        sigma_vec(k) = sigma_values(j) ;
    end
end
%
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Create matrix with likelihood function L
%
% Model is represented by 0 = (L - phi*H')*g
% for each t1, t2, mu, sigma.
%
% L(k,kk) = prob{  (t1,t2)=(t1_vec(k),t2_vec(k)) , mu=mu_vec(kk), sigma=sigma_vec(kk)}
%         = f(t1,mu,sigma) * f(t2,mu,sigma) 
%

LL   = zeros(M,N);  % based on CDF F
LLN  = zeros(M,N);  % based on CDF F
L    = zeros(M,N);  % based on density f
LN   = zeros(M,N);  % based on density f, normalized so that columns add up to one
posL = zeros(N,1);  % indicator of colums of L for which all values are zero
%
% t1_vec, t2_vec: vectors of times values at which data (phi) is measured
% t1v and t2v are vector of times at which we will evaluate the pdf
%
step_t=t_values(2)-t_values(1); % time elpased in grid of time values

% t1v, t2v: vector of times values in the middle of the time periods
t1v = t1_vec  + step_t ;   % time at the "end" of times periods
t2v = t2_vec  + step_t ;   %
t1_pdf = t1_vec + step_t/2; %
t2_pdf = t2_vec + step_t/2; %
t_min =  t_values(1)/100;       % mimimum value of time
t1v_minus = max(t1v - step_t , t_min);  % time at the "beginning" of time periods
t2v_minus = max(t2v - step_t , t_min);  %
%
no_num_LL = zeros(N,1);
pos_L_indicator = zeros(1,N);
%
for kk=1:N
    sigma = sigma_vec(kk);
    mu    = mu_vec(kk);
    %%% LL = product of differences of CDF's, one difference for each spell
    LL(:,kk) =  max((fct_CDF_F_capped(t1v,mu,sigma) - fct_CDF_F_capped(t1v_minus,mu,sigma)),0) .* ...
                max((fct_CDF_F_capped(t2v,mu,sigma) - fct_CDF_F_capped(t2v_minus,mu,sigma)),0) ;                
    %%% L uses densities
    L(:,kk)  = max(fct_pdf_f(t1_pdf,mu,sigma),0) .* max(fct_pdf_f(t2_pdf,mu,sigma),0);
    %
    if sum(isnan(LL(:,kk))) > 0
        no_num_LL(kk) = 1;
    end    
    % make sure we have types with a positive mass
    if sum(L(:,kk)) > 1e-100
        pos_L_indicator(kk) = 1; % positive value of sum_{t1,t2} f(t1,mu,sigma)f(t2,mu,sigma)
        % otherwise it leaves all zeros
    end
end
%
%
% Remove columns with zero (or very small) mass from L and LL. That is,
% delete pairs (mu,sigma) that equal to zero.
% We also construct matrix H.
N_zero = N - sum(pos_L_indicator) ;

if use_CDF_only == 1
    pos_L_indicator = pos_L_indicator.*(1-no_num_LL');
end

Npos = sum(pos_L_indicator);

Lpos    = zeros(M,Npos);  
LLpos   = zeros(M,Npos) ;
mu_vec_pos    = zeros(1,Npos);
sigma_vec_pos = zeros(1,Npos);

kpos = 1 ;
for kk = 1:N    
    if pos_L_indicator(kk) == 1
       LLpos(:,kpos) = LL(:,kk);
       Lpos(:,kpos)  = L(:,kk);
        
       mu_vec_pos(1,kpos) = mu_vec(kk);
       sigma_vec_pos(1,kpos) = sigma_vec(kk);

       % contruct vector H for types we keep
       mu     = mu_vec(kk);
       sigma  = sigma_vec(kk);
       H(kpos)= (fct_CDF_F_capped(TU,mu,sigma)-fct_CDF_F_capped(TL,mu,sigma))^2;

       kpos = kpos + 1;        
    end
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Solve for distribution of mu and sigma, labelled as g or gp.
%
% This section of the code solve for g = prob{mu, sigma}
% It mimimizes the sum of square differences between the
% prediction for spells and the data:
%
% gp = arg min_g || phi_data - LNpos g || st. g>0 and sum g = 1
%
% We do this in several steps.
%
% We start with g that has dimension Npos.
%
% Define elements for minimization:

if use_pdf_only ==1
    C = Lpos - phi_data*H ;
else
    C = LLpos - phi_data*H;
end
d   = zeros(M,1);                                                                                                                                                                                                                                                                                              

% We use different weighting scheme.
% 
% We assign weigh to phi(t_1,t_2) which is based on how many workers with
% (t_1,t_2) we observe. The weight is 1/phi(t_1,t_2).
% However, we restrict the maximum weight given phi(t_1,t_2) can get. This
% is an input parameter.

w = 2.5;
phi_1over  = ones(M,1);
phi_1over(phi_data>0) = 1./phi_data(phi_data>0);
%phi_1over(phi_data>0) = phi_data(phi_data>0);
phi_1over   = min(phi_1over,weight_max);
C_weight    = diag(phi_1over.^(1/w));
C           = C_weight*C;

% fit gp by minimizing || d - C g|| subject to g >0
% we impose sum g = 1 by introducing a penalty on (sum g - 1)
% this is the M+1 equation
    
 CC         = zeros(M+1,Npos);
 CC(M+1,:)  = ones(1,Npos)*lambda_sum;
 CC(1:M,:)  = C;
 dd         = zeros(M+1,1);
 dd(M+1)    = lambda_sum;

% the following line solves for the n leas squares problem subjet to non-negativity 
  
options = optimset('TolFun',1e-15,'TolX',1e-10);
[gp,RESNORM,RESIDUAL,EXITFLAG,OUTPUT,LAMBDA] = lsqnonneg(CC,dd,options) ;
%[gp,rankdef,RESNORM,RESIDUAL,EXITFLAG,OUTPUT,LAMBDA] = lsqnonneg_warn(CC,dd,options) ;
rankdef = 0;
MDoutput.rankdef=rankdef;

% display condition number of CC
cond_num_nonreg = cond(CC);
if print_statistics == 1
disp(sprintf('condition number before regularization %30d',cond(CC)))
disp(sprintf('sum of g: %7.5f',sum(gp)))
end

% just in case, normalize and keep the non-negative g onlu
gp = max(gp,0)/sum(max(gp,0));



%--------------------------------------------------------------------------
% Tykhonov regularization

% The next lines implement the regularization in two steps.
% (1)The first step selects a set of (mu,sigma) pairs in the grid to use in the
% regularization.
% We select only some pairs to reduce the dimensionality of the
% problem for computational reasons.
% We smooth out the distribution g using a two sided HP filter.
% In this way identify a set of (mu,sigma) close to those estimated with
% a positive probabiity and keep only the original ones and those (mu,sigma)
% for which the HP filter gives positive probability for the regularization.
% (2) The second step is to solve for g including a penalty on || g ||^2
% We implement this by adding an identity matrix labmda_Tykhonov I_{N,N} to the
% matrix C and a vector of N zeros to d, where N is the number of points in the grid. 
CC_orig = CC;
dd_orig = dd;
gp_orig = gp;
if norm(lambda_Tykhonov_vec) > 0;
    
    % HP filter for estimated g
    
    % make gp into a square by adding zeros
    g_tosmooth  = zeros(N,1);
    g_tosmooth(pos_L_indicator==1) = gp;
    g_tosmooth_square = reshape(g_tosmooth,Nmu,Nsigma);
    
    % HP filter proabilites g
    g_smooth_square = fct_HP_2dim(g_tosmooth_square,lambda_g_smooth);    
    g_smooth = g_smooth_square(:);
    
    % keep all positive g(k) in the vector gp
    indicator_gp = gp>0;
    % keep all g(k) greater than g_low
    g_smooth_select     = g_smooth(pos_L_indicator==1);
    indicator_gsmooth   = g_smooth_select > g_low;
    select_cols         = indicator_gp | indicator_gsmooth;
    
    % pepare matrices for Tykhonov regularization
    CC_old  = CC(:,select_cols);
    Npos    = sum(select_cols);
    dd_old  = dd;
    
    for j=1:length(lambda_Tykhonov_vec);
        lambda_Tykhonov = lambda_Tykhonov_vec(j);
        % matrix sqrt(lambda_Tykhonov)*eye(Npos) implements the Tykhonov regularization
        CC      = [CC_old ; sqrt(lambda_Tykhonov)*eye(Npos)];
        dd      = [dd_old ; zeros(Npos,1) ] ;
        
        % this maximization implements the regularization as a non-negative LS
        options = optimset('TolFun',1e-20,'TolX',1e-20);
        [gp_tyk,RESNORM,RESIDUAL,EXITFLAG,OUTPUT_tyk,LAMBDA_tyk] = lsqnonneg(CC,dd,options) ;
        
        % add zeros so that gp_tyk and gp have the same length
        gp_tyk_full  = 0*gp;
        gp_tyk_full(select_cols==1) = gp_tyk;
        
        % create a sqaure gp_tyk so that it can be plotted with surf
        gp_tyk_square                     = zeros(N,1);
        gp_tyk_square(pos_L_indicator==1) = gp_tyk_full;
        gp_tyk_square                     = reshape(gp_tyk_square,Nmu,Nsigma);
        
        % save old gp, create new gp
        gp_old = gp;
        gp     = gp_tyk_full;
        
        % computes the objective function:
        obj_func(j) = norm(d-C*gp_tyk_full)^2;
        norm_g(j)   = norm(gp_tyk_full)^2 ;                
        %obj_func2(j) = norm(dd_orig-CC_orig*gp_tyk_full)^2;
        
        % display output
        cond_num_reg(j) = cond(CC);
        if print_statistics == 1
        disp(sprintf('condition number after regularization %7.2f',cond_num_reg(j)))
        disp(sprintf('sum  of g: %7.5f',sum(gp_tyk_full)))
        disp(sprintf('norm of g: %7.5f',norm_g(j)))
        end
    end
end


if length(lambda_Tykhonov_vec) >  1
    figure(101);
    pl=plot(log(obj_func),log(norm_g),'-*');
    set(pl,'Linewidth',2);
    yl=ylabel('$\log ||g_\lambda||^2$','Interpreter','Latex','Fontsize',16);
    set(yl,'Interpreter','Latex','Fontsize',16);
    xl=xlabel('$\log||\phi -F \, g_{\lambda} ||^2$');
    set(xl,'Interpreter','Latex','Fontsize',16);
    ti=title('L curve: $g_{\lambda} = \arg\min_{g\geq0 \, ,\, \sum g = 1\, } \ ||\phi - F\, g ||^2 + \lambda ||g ||^2$ for several $\lambda$');
    set(ti,'Interpreter','Latex','Fontsize',16);
    %text(max(log(norm_g)), max(log(obj_func)),'caca');
    tt = text(mean(log(norm_g)), max(log(obj_func)),'$F$ is the discretization of the integral equation' );
    set(tt,'Interpreter','Latex','Fontsize',16);
    vertsp = ( max(log(obj_func)) - min(log(obj_func)) )/50;
    for i=1:length(lambda_Tykhonov_vec); text(log(obj_func(i)),log(norm_g(i))+vertsp,[' \lambda = ', num2str(lambda_Tykhonov_vec(i))]); end
    print -depsc  fig_L_curve
    
%     figure(102);
%     plot(log(obj_func2),log(norm_g),'-*');
%     xlabel('$\log||\phi -F \, g_{\lambda} ||^2$');
%     ylabel('$\log ||g_\lambda||^2$');
%     title('L-curve with obj.f. which includes penalty for sum g')
end

%--------------------------------------------------------------------------

% if artificial_data == 1
%     disp(' ')
%     disp('true values of mu, sigma, g')
%     [data_description.mu_vec data_description.sigma_vec data_description.g_vec]
%     disp(' ')
% end

%--------------------------------------------------------------------------
gp = max(gp,0)/sum(max(gp,0));

% construct a g w/values even in the grid points that are not used 
kpos = 1;
g=zeros(N,1);
for kk=1:N
    if pos_L_indicator(kk) == 1
        g(kk) = gp(kpos);
        kpos  = kpos+1;
    else
        g(kk) = 0;
    end
end
%
%
% Comput the implied fraction phi_hat for the model

if use_pdf_only == 1;
    %phi_hat     = LNpos * gp;
    %phi_hat_adj = LNadjpos * gp; % implied distribution including both in and off BOTH diagonal
    phi_hat_adj = ( L * g)/(H*gp);
else
    %phi_hat     = LLNpos * gp;
    %phi_hat_adj = LLNadjpos * gp; % implied distribution including both in and off BOTH diagonal
    phi_hat_adj = ( LL * g)/(H*gp);
end


% construct matrices Z_phi_data(i,j) = phi_data(t_values(i), t_values(j));
% and Z_phi_hat(i,j) = phi_hat(t_values(i), t_values(j))
% this allows to build surfaces graphs
%
%

Nt=TU-TL+1;
Z_phi_data=zeros(Nt,Nt);
Z_phi_hat =zeros(Nt,Nt);
beg_k = 1;
for i=1:Nt
    end_k = beg_k + (Nt-i);
    Z_phi_data(i,i:end) = phi_data_raw(beg_k:end_k);
    Z_phi_hat(i,i:end)  = phi_hat_adj(beg_k:end_k);
    beg_k = end_k+1;
end;
%%% symmetrize the matrix Z_phi
for i=1:Nt
    for j=1:i;
        Z_phi_data(i,j) = Z_phi_data(j,i);
        Z_phi_hat(i,j)  = Z_phi_hat(j,i);
    end
end;
%
%
z_phi_hat  = Z_phi_hat; % adds up to one.
z_phi_data = Z_phi_data/sum(sum(Z_phi_data));
% scale Z_phi_hat so that it is interpreted as number of spells
%Z_phi_hat = Z_phi_hat * sum(sum(Z_phi_data));
Z_phi_hat = z_phi_hat * sum(sum(Z_phi_data));
%


% %%%% Produce marginal distribution from data and model  %%%%%
%
z_marg_data   = zeros(Nt,1);
z_marg_hat    = zeros(Nt,1);
%
%%% Define distribution of spells, 
%
for i=1:Nt
    z_marg_data(i) = sum(z_phi_data(i,:));   % marginal distribution in data
    z_marg_hat(i)  = sum(z_phi_hat(i,:));  % marginal distribution in fitted model 
end

%%%%

%%%% computes fit of model
mean_sqr_error = sqrt(mean(mean(z_phi_hat-z_phi_data).^2));
%
avg_phi_data = mean(mean(z_phi_data));
%
mean_sqr_error_prop = sqrt(mean(mean((z_phi_hat-z_phi_data)./z_phi_data ).^2));
%
mean_abs_abs_error = mean(mean(abs(z_phi_hat-z_phi_data)));
%
mean_abs_prop_error = mean(mean(abs(z_phi_hat-z_phi_data)./z_phi_data));
%
max_abs_error = max(max(abs(z_phi_hat-z_phi_data))) ;
%
max_prop_error = max(max(abs((z_phi_hat-z_phi_data)./z_phi_data ))) ;
%
%
% computes some statistics for the parameter values :
%
mean_mu    =  mu_vec * g  ;
mean_1_mu  =  1./mu_vec * g  ;
mean_sigma =  sigma_vec *g ;  

%
var_mu     =  mu_vec.^2 * g    - mean_mu^2 ;
var_1_mu   =  (1./mu_vec).^2 * g    - mean_1_mu^2 ;
var_sigma  =  sigma_vec.^2 *g  - mean_sigma^2 ;
std_mu     = sqrt(var_mu);
std_sigma  = sqrt(var_sigma);
std_1_mu   = sqrt(var_1_mu);
covar_mu_sigma =  mu_vec.*sigma_vec * g - mean_mu*mean_sigma;
corr_mu_sigma  = covar_mu_sigma/(std_mu * std_sigma);

%%% computes alpha and beta
%%% alpha = mu/sigma  and beta = 1/sima
alpha_vec = mu_vec./sigma_vec;
beta_vec = 1./sigma_vec; 

mean_alpha       =  alpha_vec * g  ;
mean_beta        =  beta_vec * g ;  
var_alpha        =  alpha_vec.^2 * g - mean_alpha^2 ;
var_beta         =  beta_vec.^2 *g   - mean_beta^2 ;
std_alpha        = sqrt(var_alpha);
std_beta         = sqrt(var_beta);
covar_alpha_beta =  alpha_vec .* beta_vec * g - mean_alpha*mean_beta;
corr_alpha_beta  =  covar_alpha_beta/(std_alpha * std_beta);

% computes matrix Z_g with values of g to build surfaces
%  Z_g(i,j) = g( mu(i), sigma(j))
%
Z_g = zeros(Nmu,Nsigma);
begi=1;
for i=1:Nmu
    endi        = begi+Nsigma-1;
    Z_g(i,:)    = g(begi:endi);
    begi        = endi+1;
end
%
% estimate  =[ mu, sigma, g alpha beta ]
estimate =   [ mu_vec', sigma_vec',  g ,alpha_vec', beta_vec'];
%
sort_est = sortrows(estimate,3);
%
% discard the values of g that are zero, or very small, smaller than g_low
i=1;
i_min=1;
while i <=N;
    if sort_est(i,3) < g_low;
        i=i+1;
    else
        min_g = sort_est(i,3);
        i_min=i;
        i=N+1;
    end
end
%
%
estimate_pos = sort_est(i_min:end,:);
%
estimate_pos(:,3)=estimate_pos(:,3)/sum(estimate_pos(:,3));
%
%%%%% matrix estimate_pos has 5 columns: mu, sigma g alpha and beta
%%%%% it only keeps the values that has high probability
%%%%%

g_pp     = estimate_pos(:,3);
mu_pp    = estimate_pos(:,1);
sigma_pp = estimate_pos(:,2);
alpha_pp = estimate_pos(:,4);
beta_pp  = estimate_pos(:,5);
Npp      = length(g_pp);
%

% Computation of hazard rates: conditional on being between TL and TU
    
    for k = 1:size(g_pp)
        mu              = mu_pp(k);
        sigma           = sigma_pp(k);
        fgrid_ind(k,:)  = fct_pdf_f(t_hazard+0.5,mu,sigma);
        Fgrid_ind(k,:)  = fct_CDF_F_capped(t_hazard,mu,sigma);
        Fgrid_ind_T(1,k)= fct_CDF_F_capped(260.5,mu,sigma); % CDF at tmax = 260
    end

    Fgrid_T     = Fgrid_ind_T*g_pp;
    fgrid       = fgrid_ind'*g_pp;
    Fgrid       = Fgrid_ind'*g_pp;
    %hazard_hat  = fgrid./(Fgrid_T-Fgrid);
    hazard_hat  = fgrid./(1-Fgrid./Fgrid_T);
    
    %surv_marg = [0; min(cumsum(z_marg_hat),1)];
    surv_marg = [min(cumsum(z_marg_hat),1)];
    hazard_hat2 = z_marg_hat./(1-surv_marg(1:end));
    
if print_statistics == 1         

    disp(' ' )
    disp('It finished producing the hazard rates in the model  ')
    disp(' ' )
    
    %%%%% print statistics:
    %%%%%
    disp(' ')
    disp(['Sample with shortest spells TL = ', num2str(TL,2),' and  longest spell TU = ' num2str(TU,2)]);
    disp(['Number of spells (below and at diagonal) = ', num2str(M/1000,2), ' (thousands)']);
    disp(' ')
    disp([num2str(Nmu,2),   ' values of mu on interval    ', num2str([muL, muU],2)]);
    disp([num2str(Nsigma,2),' values of sigma on interval ', num2str([sigmaL, sigmaU],2)]);
    disp(' ')
    
    disp('Measures of fit for phi, across all pairs of spells: ');
    disp(['Square root of mean square absolute error      = ', num2str(mean_sqr_error,3)]);
    disp(['Average size of phi = ',num2str(avg_phi_data,2)]);
    disp(['Sqr root mean squre error / avg phi = ',num2str(mean_sqr_error/avg_phi_data,2)])
    disp(['Square root of mean square proportional error  = ', num2str(mean_sqr_error_prop,3)]);
    disp(['Mean absolute value absolute error     = ', num2str(mean_abs_abs_error,3)]);
    disp(['Mean absouute value proportional error = ', num2str(mean_abs_prop_error,3)]);
    disp(['Maximum absolute error     = ', num2str(max_abs_error,3)]);
    disp(['Maximum proportional error = ', num2str(max_prop_error,3)]);
    disp(' ')
    
    disp(['Fitted with ', num2str(length(estimate_pos(:,3)),5),' non-zero values of g']);
    if use_CDF_only == 1
        disp('Uses CDF only (discards mu,sigma when not defined) ')
    elseif use_pdf_only == 1;
        disp('Uses pdf only (discards mu,sigma when pdf = 0 for all t1,t2) ')
    elseif use_CDF_when_possible==1;
        disp('Uses CDF whenever possible, otherwise uses pdf')
    end
    disp(['Number of (mu,sigma) combinaton for which CDF not defined some t  = ', num2str(sum(no_num_LL),4)]);
    disp(['Number of (mu,sigma) combinaton for which pdf =0 for all some t   = ', num2str(N_zero,4)]);
    disp('Distribution of g implies: ')
    disp(['Mean of mu    = ' ,num2str(mean_mu ,2),    ' and St Dev of mu    = ',num2str(std_mu,2)]);
    disp(['Mean of 1/mu  = ' ,num2str(mean_1_mu ,4),  ' and St Dev of 1/mu   = ',num2str(std_1_mu,4)]);
    disp(['Mean of sigma = ' ,num2str(mean_sigma ,2),    ' and St Dev of sigma    = ',num2str(std_sigma,2)]);
    disp(['Mean of alpha    = ' ,num2str(mean_alpha ,2), ' and St Dev of alpha  = ',num2str(std_alpha,2)]);
    disp(['Mean of beta     = ' ,num2str(mean_beta ,4),  ' and St Dev of beta   = ',num2str(std_beta,4)]);
    disp(['Mean of sigma = ' ,num2str(mean_sigma ,2), ' and St Dev of sigma = ',num2str(std_sigma,2)]);
    disp(['Correlation coefficient between (mu, sigma)   = ' ,num2str(corr_mu_sigma,2)]);
    disp(['Correlation coefficient between (alpha, beta) = ' ,num2str(corr_alpha_beta,2)]);
    disp(' ');

end

%%% SAVE OUTPUT
% results
MDoutput.g_pp = g_pp;
MDoutput.mu_pp = mu_pp;
MDoutput.sigma_pp = sigma_pp;
MDoutput.alpha_pp = alpha_pp;
MDoutput.beta_pp = beta_pp;
% inputs
MDoutput.t_values = t_values;
MDoutput.mu_vec = mu_vec;
MDoutput.sigma_vec = sigma_vec;
MDoutput.alpha_vec = alpha_vec;
MDoutput.beta_vec = beta_vec;
MDoutput.mu_values = mu_values;
MDoutput.sigma_values = sigma_values;
MDoutput.N = N;
MDoutput.g = g;
MDoutput.Npos = Npos;
MDoutput.gp = gp;
MDoutput.estimate_pos = estimate_pos;
% error statistics
MDoutput.mean_sqr_error = mean_sqr_error;
MDoutput.mean_sqr_error_prop = mean_sqr_error_prop;
MDoutput.mean_abs_abs_error = mean_abs_abs_error;
MDoutput.mean_abs_prop_error = mean_abs_prop_error;
MDoutput.max_abs_error = max_abs_error;
MDoutput.max_prop_error  = max_prop_error;
MDoutput.mean_abs_prop_error = mean_abs_prop_error;
MDoutput.max_abs_error  = max_abs_error;
MDoutput.max_prop_error = max_prop_error;
% fitted distributions
MDoutput.avg_phi_data = avg_phi_data;
MDoutput.z_marg_data  = z_marg_data;
MDoutput.z_marg_hat   = z_marg_hat;
MDoutput.Z_phi_hat    = Z_phi_hat;
MDoutput.Z_phi_data   = Z_phi_data;

save(filename_out, 'MDinput','MDoutput')

%%%% produce some graphs
%%%%
if produce_graphs ==1

    % set azimut and elevation for 3D graphs
    AZ = 120; EL = 30;
    %
    %
    fign=0;

    fign=fign+1;
    figure(fign); 
    subplot(1,2,1)
    surf(g_tosmooth_square)
    title('vector g from LS nonneg')
    subplot(1,2,2)
    surf(g_smooth_square)
    title('smooth vector g')


    % Data phi in levels
    fign=fign+1;
    figure(fign); su=surf(t_values,t_values,Z_phi_data); view([AZ EL]);
    set(su,'Linewidth',2);
    xl=xlabel('$t_1$ weeks');
    set(xl,'Interpreter','Latex','Fontsize',16);
    yl=ylabel('$t_2$ weeks');
    set(yl,'Interpreter','Latex','Fontsize',16);
    zl=zlabel('$\phi(t_1,t_2)$, number of spells');
    set(zl,'Interpreter','Latex','Fontsize',16);
    ti=title('Smooth version of the data');
    set(ti,'Interpreter','Latex','Fontsize',16);
    %print -depsc  fig_phi_data_level_1
    %
    % Fitted phi in levels
    fign=fign+1;
    figure(fign); su=surf(t_values,t_values,Z_phi_hat); view([AZ EL]);
    set(su,'Linewidth',2);
    xl=xlabel('$t_1$ in weeks');
    set(xl,'Interpreter','Latex','Fontsize',16);
    yl=ylabel('$t_2$ in weeks');
    set(yl,'Interpreter','Latex','Fontsize',16);
    zl=zlabel('$\phi(t_1,t_2)$, number of spells');
    set(zl,'Interpreter','Latex','Fontsize',16);
    ti=title('Fitted distribution of spells');
    set(ti,'Interpreter','Latex','Fontsize',16);
    %print -depsc  fig_phi_hat_levels_1
    %
    % Error in the fit -- level
    fign=fign+1;
    figure(fign); su=surf(t_values,t_values,Z_phi_hat-Z_phi_data); view([AZ EL]);
    set(su,'Linewidth',2);
    xl=xlabel('$t_1$ in weeks');
    set(xl,'Interpreter','Latex','Fontsize',16);
    yl=ylabel('$t_2$ in weeks');
    set(yl,'Interpreter','Latex','Fontsize',16);
    zl=zlabel('$\phi(t_1,t_2)$, number of spells');
    set(zl,'Interpreter','Latex','Fontsize',16);
    ti=title('Fitting error -- in levels');
    set(ti,'Interpreter','Latex','Fontsize',16);
    %print -depsc  fig_phi_error
    %
    % Error in the fit -- weighted
    fign=fign+1;
    figure(fign); su=surf(t_values,t_values,(Z_phi_hat-Z_phi_data)./Z_phi_data); view([AZ EL]);
    set(su,'Linewidth',2);
    xl=xlabel('$t_1$ in weeks');
    set(xl,'Interpreter','Latex','Fontsize',16);
    yl=ylabel('$t_2$ in weeks');
    set(yl,'Interpreter','Latex','Fontsize',16);
    zl=zlabel('$\phi(t_1,t_2)$, number of spells');
    set(zl,'Interpreter','Latex','Fontsize',16);
    ti=title('Fitting error, in percent');
    set(ti,'Interpreter','Latex','Fontsize',16);
    %
    % Data phi in logs
    fign=fign+1;
    figure(fign); su=surf(t_values,t_values,log(Z_phi_data)); view([AZ EL]);
    set(su,'Linewidth',2);
    xl=xlabel('$t_1$ in weeks');
    set(xl,'Interpreter','Latex','Fontsize',16);
    yl=ylabel('$t_2$ in weeks');
    set(yl,'Interpreter','Latex','Fontsize',16);
    zl=zlabel('$\log{\left( \phi(t_1,t_2) \right)} $ \ of number of spells');
    set(zl,'Interpreter','Latex','Fontsize',16);
    ti=title('Smooth version of the data, in logs');
    set(ti,'Interpreter','Latex','Fontsize',16);
    %print -depsc  fig_phi_data_logs_1
    %
    % Fitted phi in logs
    fign=fign+1;
    figure(fign); su=surf(t_values,t_values,log(Z_phi_hat)); view([AZ EL]);
    set(su,'Linewidth',2);
    xl=xlabel('$t_1$ in weeks','Interpreter','Latex','Fontsize',16);
    set(xl,'Interpreter','Latex','Fontsize',16);
    yl=ylabel('$t_2$ in weeks');
    set(yl,'Interpreter','Latex','Fontsize',16);
    zl=zlabel('$log{ \left(\phi(t_1,t_2)\right)} \, $ \ of number of spells');
    set(zl,'Interpreter','Latex','Fontsize',16);
    ti=title('Fitted distribution of spells, in logs');
    set(ti,'Interpreter','Latex','Fontsize',16);
    %print -depsc  fig_phi_hat_logs_1
    %
    % Error in the fit -- log differences
    fign=fign+1;
    figure(fign); su=surf(t_values,t_values,log(Z_phi_data)-log(Z_phi_hat)); view([AZ EL]);
    set(su,'Linewidth',2);
    xl=xlabel('$t_1$ in weeks');
    set(xl,'Interpreter','Latex','Fontsize',16);
    yl=ylabel('$t_2$ in weeks');
    set(yl,'Interpreter','Latex','Fontsize',16);
    zl=zlabel('$\phi(t_1,t_2)$, number of spells');
    set(zl,'Interpreter','Latex','Fontsize',16);
    ti=title('Fitting error -- log differences');
    set(ti,'Interpreter','Latex','Fontsize',16);
    %
    % Data and Fitted marginal
    fign=fign+1;
    %figure(fign); pl=plot(t_values,z_marg_data,'k*',t_values,z_marg_hat,'b-');
    figure(fign); pl=plot(t_values,z_marg_data,'k*',t_values,z_marg_hat,'b-') ;
    set(pl,'Linewidth',2);
    xl=xlabel('$t$ in weeks','Interpreter','Latex','Fontsize',16);
    set(xl,'Interpreter','Latex','Fontsize',16);
    yl=ylabel('$\phi(t) : $ marginal distribution');
    set(yl,'Interpreter','Latex','Fontsize',16);
    le=legend('data','model');
    set(le,'Interpreter','Latex','Fontsize',16);
    ti=title('Fitted distribution of spells, in logs');
    set(ti,'Interpreter','Latex','Fontsize',16);
    %print -depsc  fig_phi_marg_1

    %
    % Data and Fitted marginal in logs
    fign=fign+1;
    %figure(fign); pl=plot(t_values,log(z_marg_data),'k*',t_values,log(z_marg_hat),'b-');
    figure(fign); pl=plot(t_values,log(z_marg_data),'k*',t_values,log(z_marg_hat),'b-');
    set(pl,'Linewidth',2);
    xl=xlabel('$t$ in weeks','Interpreter','Latex','Fontsize',16);
    set(xl,'Interpreter','Latex','Fontsize',16);
    yl=ylabel('$log \,  \phi(t) : $ log of marginal distribution');
    set(yl,'Interpreter','Latex','Fontsize',16);
    le=legend('data','model');
    set(le,'Interpreter','Latex','Fontsize',16);
    ti=title('Fitted distribution of spells, in logs');
    set(ti,'Interpreter','Latex','Fontsize',16);
    %print -depsc  fig_phi_marg_logs_1
    %



    % Fitted distribution of g of mu and sigma
    fign=fign+1;
    %
    figure(fign); su=surf(sigma_values,mu_values,Z_g); view([AZ EL]);
    set(su,'Linewidth',2);
    xl=xlabel('$\sigma$ volatility');
    set(xl,'Interpreter','Latex','Fontsize',16);
    yl=ylabel('$\mu$ drift');
    set(yl,'Interpreter','Latex','Fontsize',16);
    zl=zlabel('$g(\mu,\sigma)$ probability');
    set(zl,'Interpreter','Latex','Fontsize',16);
    ti=title('Estimated distribution of parameters');
    set(ti,'Interpreter','Latex','Fontsize',16);
    %print -depsc  fig_g_1


    % Fitted distribution of g of mu and sigma
    fign=fign+1;
    %
    figure(fign); su=surf(sigma_values,mu_values,log(Z_g+0.00000001)); view([AZ EL]);
    set(su,'Linewidth',2);
    xl=xlabel('$\sigma$ volatility');
    set(xl,'Interpreter','Latex','Fontsize',16);
    yl=ylabel('$\mu$ drift');
    set(yl,'Interpreter','Latex','Fontsize',16);
    zl=zlabel('$\log g(\mu,\sigma)$ probability');
    set(zl,'Interpreter','Latex','Fontsize',16);
    ti=title('Estimated distribution of parameters');
    set(ti,'Interpreter','Latex','Fontsize',16);
    %print -depsc  fig_log_g_1

    fign=fign+1;
    %
    figure(fign); pl=plot(gp,C'*(phi_data-C*gp),'.'); 
    set(pl,'Linewidth',2);
    xl=xlabel('probability $g$','Interpreter','Latex','Fontsize',16);
    set(xl,'Interpreter','Latex','Fontsize',16);
    yl=ylabel('Derivative of objective functions');
    set(yl,'Interpreter','Latex','Fontsize',16);
    ti=title('Without impossing $\sum g = 1 $');
    set(ti,'Interpreter','Latex','Fontsize',16);
    %print -depsc  fig_foc_g_1

    fign = fign+1;
    figure(fign); pl=plot(gp,LAMBDA,'.'); 
    set(pl,'Linewidth',2);
    xl=xlabel('probability $g$','Interpreter','Latex','Fontsize',16);
    set(xl,'Interpreter','Latex','Fontsize',16);
    yl=ylabel('Derivative of objective functions');
    set(yl,'Interpreter','Latex','Fontsize',16);
    ti=title('Impossing $\sum g = 1 $');
    set(ti,'Interpreter','Latex','Fontsize',16);
    %print -depsc  fig_foc_g_foc_minimizer_1

    fign=fign+1;    
    figure(fign); pl=plot(gp,gp,'*',gp,gp,'-'); 
    set(pl,'Linewidth',2);
    xl=xlabel('probabity $g$, adds to one ');
    set(xl,'Interpreter','Latex','Fontsize',16);
    yl=ylabel('probability $g$ unrestricted');
    set(yl,'Interpreter','Latex','Fontsize',16);
    ti=title('Adjusted on g');
    set(ti,'Interpreter','Latex','Fontsize',16);
end

return
